Mapping Price

Here is the beginning code from the exploratory data analysis file that loads in the cleaning listings dataset and creates a box plot using log prices.

Here is the plotly mapbox with just listings mapped.

Sys.setenv("MAPBOX_TOKEN" = "pk.eyJ1IjoiYm9zdG9uY29ubm9yMTEiLCJhIjoiY2xncXIya2VxMGc1cTNmc2I3NjFoY2NkMyJ9.fcKb-W66WPlzv4oOx6ZC4A")

p <- 
  plot_mapbox(boston_airbnb, width = 1200, height = 900) %>% 
  add_markers(
    x = ~longitude,
    y = ~latitude,
    color = ~log_price,
    name  = "Log (base 10) of price",
    text = 
      ~paste(
        name,
        "\nRoom type:", room_type,
        "\nPrice: ", price,
        "\nNeighborhood: ", neighbourhood,
        "\nMinimum nights: ", minimum_nights
      ),
    hoverinfo = "text"
  ) %>% 
  layout(
    mapbox = 
      list(
        center = list(lat = 42.32, lon = -71.1),
        zoom = 9.5,
        style = "dark"
      )
  )

p

Adding T stations and T lines

I will insert a GPX formatted file of MBTA stations from http://erikdemaine.org/maps/mbta/ which includes all the waypoints of the MBTA stations as well as routes of rapid transit lines.

I will use read_GPX() function from tmaptools package which reads GPX files into sf objects in R.

library(tmaptools)
mbta <- read_GPX("mbta.gpx")

stations <-
  mbta$waypoints %>%
    filter(grepl('Red Line|Green Line|Blue Line|Orange Line', type))

T_lines <-
  mbta$tracks %>%
    filter(grepl('Red Line|Green Line|Blue Line|Orange Line', name))

  (p %>% 
  add_sf(
    data = stations,
    inherit = FALSE,
    name = "MBTA T stations",
    text = ~name,
    hoverinfo = "text"
  ) %>% 
  add_sf(
    data = T_lines,
    text = ~name,
    hoverinfo = "text",
    name = "MBTA T lines"
  )
)

Now we will change the lines according to color.

T_lines %>% 
  select(name)
## Simple feature collection with 12 features and 1 field
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -71.25173 ymin: 42.20878 xmax: -70.99214 ymax: 42.43534
## Geodetic CRS:  WGS 84
## First 10 features:
##                    name                       geometry
## 1       Red Line (main) MULTILINESTRING ((-71.14098...
## 2    Red Line (Ashmont) MULTILINESTRING ((-71.05239...
## 3   Red Line (Mattapan) MULTILINESTRING ((-71.06422...
## 4  Red Line (Braintree) MULTILINESTRING ((-71.05239...
## 5     Green Line (main) MULTILINESTRING ((-71.07688...
## 6  Green Line (B, C, D) MULTILINESTRING ((-71.07809...
## 7        Green Line (B) MULTILINESTRING ((-71.0953 ...
## 8        Green Line (C) MULTILINESTRING ((-71.0953 ...
## 9        Green Line (D) MULTILINESTRING ((-71.0953 ...
## 10       Green Line (E) MULTILINESTRING ((-71.07809...

As shown, there are multiple lines with a respective color. Thus I will create a function that filters through the name column and adds an sf layer.

add_MBTA_line <- function(p, line_color) {
  res <-
    p %>% 
      add_sf(
        data = T_lines %>% filter(grepl(line_color, name, ignore.case = TRUE)),
        color = ~I(line_color),
        text = ~name,
        hoverinfo = "text",
        name = paste0(line_color, " line")
      )
  return(res)  
}

p_lines <- 
  p %>% 
    add_sf(
      data = stations,
      name = "MBTA T stations",
      text = ~paste0(name, " (", type, ")"),
      hoverinfo = "text",
      color = I("pink"),
      size = I(30)
    ) %>%
    add_MBTA_line("red") %>%
    add_MBTA_line("green") %>%
    add_MBTA_line("orange") %>%
    add_MBTA_line("blue") %>% 
    colorbar(title = "log(price)")

p_lines

Map with Neighborhood Boundaries

I will use a KML formatted file from https://data.boston.gov/dataset/boston-neighborhoods that contains the boundaries of all of Boston’s neighborhoods.

To read the KML data into sf, we will use st_read from the sf package.

boston_neighborhoods <- sf::st_read("Boston_Neighborhoods.kml")
## Reading layer `Boston_Neighborhoods' from data source 
##   `E:\Career\Projects\Boston_Shiny_Map\Boston-Airbnb-Listings\Boston_Neighborhoods.kml' 
##   using driver `KML'
## Simple feature collection with 26 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -71.19125 ymin: 42.22792 xmax: -70.92278 ymax: 42.39699
## Geodetic CRS:  WGS 84
p1 <- 
  p %>%
    add_sf(
      inherit = FALSE,
      data = boston_neighborhoods,
      fill = "",
      name = "Neighborhoods Boundaries",
      text = ~Name,
      hoverinfo = "text",
      color = I("azure4")
    ) %>% 
    colorbar(title = "log(price)")

pp <- subplot(p_box, p1, nrows = 2, heights = c(0.2, 0.8), margin = 0.1)
## Warning: Can only have one: config
pp
## Warning: Can't display both discrete & non-discrete data on same axis

Additional Mobility and Accessbility Infrastructure

Now I will incorporate Bluebikes stations and Police Stations.

police <- read_csv("Boston_Police_Stations.csv")
## Rows: 12 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): ADDRESS, NAME, ZIP, PARCEL_ID
## dbl (10): X, Y, OBJECTID_1, OBJECTID, BPD_ID, ID, FTSQFT, STORY_HT_, CENTROI...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bikes <- read_csv("Blue_Bike_Stations.csv")
## Rows: 458 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Number, Name, District, Public_
## dbl (6): X, Y, Latitude, Longitude, Total_docks, ObjectId
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
meters <- read_csv("Parking_Meters.csv")
## Rows: 1000 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): VENDOR, PAY_POLICY, PARK_NO_PAY, TOW_AWAY, DIR, BLK_NO, STREET, ME...
## dbl (12): X, Y, OBJECTID, METER_ID, LOCK_, LONGITUDE, LATITUDE, SPACE_NUMBER...
## lgl (16): PRE_PAY, GREEN_DOME, STREET_CLEANING, LOCK__, TRAVEL_DIRECTION, FR...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
hospitals <- read_csv("Hospitals.csv")
## Rows: 30 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Name, Address, Zipcode, Contact, PhoneNumbe, Primary_Alt, Alternati...
## dbl (7): X, Y, OBJECTID, Census_Tra, Latitude, Longitude, DailyAvg
## lgl (3): City, Statea, Comment_
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ev <- read_csv("Charging_Stations.csv")
## Rows: 55 Columns: 41
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): Fuel_Type_Code, Station_Name, Street_Address, Intersection_Directi...
## dbl (10): X, Y, OBJECTID, ZIP, EV_Level1_EVSE_Num, EV_Level2_EVSE_Num, EV_DC...
## lgl (13): Plus4, Expected_Date, Cards_Accepted, BD_Blends, NG_Fill_Type_Code...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
open_space <- sf::st_read("Open_Space.kml")
## Reading layer `Open_Space' from data source 
##   `E:\Career\Projects\Boston_Shiny_Map\Boston-Airbnb-Listings\Open_Space.kml' 
##   using driver `KML'
## Simple feature collection with 532 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -71.19091 ymin: 42.22789 xmax: -70.92328 ymax: 42.39314
## Geodetic CRS:  WGS 84
meters <- meters %>%
  separate(PAY_POLICY, into = c("pay_time", "pay_days", "pay_rate", "pay_duration"), sep = " ", extra = "merge") %>% 
  mutate(pay_duration = as.numeric(pay_duration)/60)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `pay_duration = as.numeric(pay_duration)/60`.
## Caused by warning:
## ! NAs introduced by coercion
p_h <- p %>% 
  add_trace(
    data = hospitals,
    x = ~Longitude,
    y = ~Latitude,
    mode = 'markers',
    name = "Hospitals",
    text = ~paste(Name,
                  "\nAddress: ", Address),
    hoverinfo = "text",
    marker = list(
      color = 'magenta', 
      size = 7)
  ) %>% 
  add_trace(
    data = bikes,
    x = ~Longitude,
    y = ~Latitude,
    opacity = 0.5,
    mode = 'markers',
    name = "Bike Stations",
    text = ~paste("Station Number: ", Number,
                  "\nAddress: ", Name,
                  "\nTotal Docks: ", Total_docks),
    hoverinfo = "text",
    marker = list(
      color = 'navy', size = 5)
  ) %>% 
  add_trace(
    data = police,
    x = ~CENTROIDX,
    y = ~CENTROIDY,
    mode = 'markers',
    name = "Police Stations",
    text = ~paste("Address: ", ADDRESS),
    hoverinfo = "text",
    marker = list(color = 'red', size = 10)
  ) %>% 
  add_trace(
    data = ev,
    x = ~Longitude,
    y = ~Latitude,
    mode = 'markers',
    name = 'EV Charging Stations',
    text = ~paste(
      Station_Name,
      "\nAddress: ", Street_Address,
      "\nNetwork ", ifelse(is.na(EV_Network), "Not Available", EV_Network),
      "\nConnector: ", EV_Connector_Types
    ),
    hoverinfo = "text",
    marker = list(color = "orange", size = 7)
  ) %>% 
  add_trace(
    data = meters,
    x = ~LONGITUDE,
    y = ~LATITUDE,
    opacity = 0.5,
    mode = 'markers',
    name = 'Parking Meters',
    text = ~paste(
      "Payment Days: ", pay_days,
      "\nPayment Times: ", pay_time,
      "\nRate (per hour): ", pay_rate,
      "\nDuration (hours): ", pay_duration
    ),
    hoverinfo = "text",
    marker = list(color = "#8c564b", size = 7)
  ) %>% 
  add_sf(
      inherit = FALSE,
      data = open_space,
      name = "Open Spcae",
      text = ~Name,
      hoverinfo = "text",
      color = I("darkgreen")
  )

p_h

By visualizing Bluebikes station locations alongside Airbnb listings, potential renters can easily identify listings with convenient access to bike-sharing facilities. This could be a crucial deciding factor for those prioritizing active transportation, and also provides a glimpse into the infrastructure of the city.

The location of police stations can be a proxy for the security and safety infrastructure of the city. By visualizing police station locations, I offer viewers an opportunity to assess the accessibility of police services from different listings. While it’s not a direct measure of a neighborhood’s safety, proximity to a police station might be an important consideration for some renters.

Choropleth Map

Here’s a Choropleth Map using ggplot that visualizes the average price for each neighborhood.

library(viridis)
## Loading required package: viridisLite
neigh_means <- boston_airbnb %>% 
  group_by(neighbourhood) %>% 
  summarise(mean = mean(price)) %>% 
  rename(Name = neighbourhood) #to match boston_neighborhoods

df_neigh <- na.omit(left_join(boston_neighborhoods, neigh_means, by = "Name"))

g_choro <- ggplot() +
  geom_sf(data = df_neigh, aes(fill = mean), color = NA) +
  geom_sf_text(data = df_neigh, aes(label = Name), size = 2, color = "red") +
  scale_fill_gradientn(colors = viridis(100)) +
  theme_minimal()

g_choro
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

p_choro <- ggplotly(g_choro)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
ggsave("neigh choro.png", plot = g_choro, dpi = 300, width = 12, height = 12)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data